2023-02-19
head(cars)str(cars)summary(cars)df_cars <- cars
<- と _ と %>% と ` back tick の確認View(cars) または、右上の Environment から、df_cars をクリック?cars または Help 検索窓で cars, head などおすすめ:Sys.setenv(LANG = "en")
R packages are extensions to the R statistical programming language containing code, data, and documentation in a standardised collection format that can be installed by users of R using Tool > Install Packages in the top menu bar of R Studio.
Rパッケージは、Rの拡張機能で、コード、データ、ドキュメントを標準化されたコレクション形式で含んでおり、標準的なものは、R Studio の Top Bar の Tool > Install Packages からインストールできます。
tidyverse, rmarkdown, WDIあとから使うので、ロードしておきます。最初に次のようなコードを実行します。右の三角を押します。
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ── ## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1 ## ✔ tibble 3.1.8 ✔ dplyr 1.1.0 ## ✔ tidyr 1.3.0 ✔ stringr 1.5.0 ## ✔ readr 2.1.4 ✔ forcats 1.0.0 ## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ## ✖ dplyr::filter() masks stats::filter() ## ✖ dplyr::lag() masks stats::lag()
library(WDI)
R Markdownはデータサイエンスのためのオーサリングフレームワーク。
コード(プログラム)とその実行結果、を記録・表示し、高品質のレポートの作成を可能にします。
R Notebook は、独立してインタラクティブに実行できるチャンクを持つR Markdownドキュメントの一つの形式で、入力のすぐ下に出力が表示することができます。
Moodle にも置いてありますが、下のリンクからも取得できます。 右上の、Code の Download Rmd から取得してください。
WDI(country = "all", indicator = c(gdp = "NY.GDP.MKTP.CD"),
extra=TRUE) %>% drop_na(gdp) %>%
filter(year==max(year), income !="Aggregates") %>%
drop_na(region) %>% arrange(desc(gdp))
chosen_countries <- c("United States","China", "Japan", "Germany", "United Kingdom","India")
WDI(country = c("CN","GB","JP","IN","US","DE"), indicator = c(gdp = "NY.GDP.MKTP.CD"), extra=TRUE) %>% drop_na(gdp) %>%
ggplot(aes(year, gdp, col = country)) + geom_line() +
labs(title = "WDI NY.GDP.MKTP.CD: gdp")
WDI(country = c("CN","IN","JP","US"),
indicator = c(gdp_growth_rate = "NY.GDP.MKTP.KD.ZG"), extra=TRUE) %>%
drop_na(gdp_growth_rate) %>%
ggplot(aes(year, gdp_growth_rate, col = country)) + geom_line() +
labs(title = paste("WDI NY.GDP.MKTP.KD.ZG: gdp growth rate"))
The World Development Indicators is a compilation of relevant, high-quality, and internationally comparable statistics about global development and the fight against poverty. The database contains 1,400 time series indicators for 217 economies and more than 40 country groups, with data for many indicators going back more than 50 years.
WDIは、世界の開発状況と、貧困との戦いに関する、適切で上質、かつ、国際的に比較可能な時系列の統計データを編纂したものです。このデータベースは、217の経済と40以上の国グループについて1,400の時系列指標を含み、指標のデータの多くは50年以上前に遡ることができます。
いくつか、リストしてみましょう。
WDI パッケージで、データをダウンロードしたり、探したり、詳細情報を得たりできます。
WDIsearch(string = "gdp", field = "name", short = TRUE, cache = NULL)
WDIsearch(string = "NY.GDP.MKTP.CD", field = "indicator", short = TRUE, cache = NULL)
名前で検索(“” の間に、(なるべく簡単な)検索文字列を入れてください。)
WDIsearch(string = "", field = "name", short = TRUE, cache = NULL)
Indicator で検索(“” の間に、調べたい indicator を入れてください。)
WDIsearch(string = "", field = "indicator", short = TRUE, cache = NULL)
short = FALSE とします。時間がかかるので、検索は、Indicator と、名前などの情報をもったファイルを手元に持っておくことにします。
wdi_cache <- WDIcache()
右上の窓枠(pane)から、wdi_cache を探して、中身を見てみましょう。series と、country の二つのデータ・フレームからなっているリストです。三角印や、右から二番目の巻物のようなアイコンをクリックすると中身が見えます。
WDIsearch(string = "CPI Price", field = "name", short = FALSE, cache = wdi_cache)
WDIsearch(string = "NY.GDP.MKTP.KD.ZG", field = "indicator", short = FALSE, cache = wdi_cache)
string と、field を、ふたつとも入れてください。
WDIsearch(string = "", field = "", short = FALSE, cache = wdi_cache)
Indicator が決まったら、ダウンロードします。
?WDI
df_gdp1 <- WDI(country = "all", indicator = "NY.GDP.MKTP.CD") df_gdp1
df_gdp2 <- WDI(country = "all", indicator = c(gdp = "NY.GDP.MKTP.CD")) df_gdp2
df_gdp3 <- WDI(country = "all", indicator = c(gdp = "NY.GDP.MKTP.CD"), extra=TRUE, cache=wdi_cache) df_gdp3
df_gdp4 <- WDI(country = c("CN","GB","JP","IN","US","DE"), indicator = c(gdp = "NY.GDP.MKTP.CD"), extra=TRUE, cache=wdi_cache)
df_gdp4
df_gdp21 <- WDI(country = "all",
indicator = c(gdp_deflator = "NY.GDP.DEFL.KD.ZG",
cpi_price = "CPTOTNSXN"),
extra=TRUE, cache=wdi_cache)
df_gdp21
str(df_gdp21)
## 'data.frame': 23972 obs. of 14 variables: ## $ country : chr "Advanced Economies" "Advanced Economies" "Advanced Economies" "Advanced Economies" ... ## $ iso2c : chr "AME" "AME" "AME" "AME" ... ## $ iso3c : chr "" "" "" "" ... ## $ year : int 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 ... ## $ status : chr "" "" "" "" ... ## $ lastupdated : chr "2020-07-27" "2020-07-27" "2020-07-27" "2020-07-27" ... ## $ gdp_deflator: num NA NA NA NA NA NA NA NA NA NA ... ## ..- attr(*, "label")= chr "Inflation, GDP deflator (annual %)" ## $ cpi_price : num 58.7 60.5 63 66 69.1 ... ## ..- attr(*, "label")= chr "CPI Price,not seas.adj,,," ## $ region : chr NA NA NA NA ... ## $ capital : chr NA NA NA NA ... ## $ longitude : chr NA NA NA NA ... ## $ latitude : chr NA NA NA NA ... ## $ income : chr NA NA NA NA ... ## $ lending : chr NA NA NA NA ...
summary(df_gdp21)
## country iso2c iso3c year ## Length:23972 Length:23972 Length:23972 Min. :1960 ## Class :character Class :character Class :character 1st Qu.:1982 ## Mode :character Mode :character Mode :character Median :1996 ## Mean :1995 ## 3rd Qu.:2009 ## Max. :2021 ## ## status lastupdated gdp_deflator cpi_price ## Length:23972 Length:23972 Min. : -98.704 Min. : 0.00 ## Class :character Class :character 1st Qu.: 2.317 1st Qu.: 55.95 ## Mode :character Mode :character Median : 5.273 Median : 83.28 ## Mean : 25.308 Mean : 84.18 ## 3rd Qu.: 10.411 3rd Qu.:108.75 ## Max. :26765.858 Max. :551.25 ## NA's :11616 NA's :18410 ## region capital longitude latitude ## Length:23972 Length:23972 Length:23972 Length:23972 ## Class :character Class :character Class :character Class :character ## Mode :character Mode :character Mode :character Mode :character ## ## ## ## ## income lending ## Length:23972 Length:23972 ## Class :character Class :character ## Mode :character Mode :character ## ## ## ##
右上の窓枠の、Environment も見てみましょう。
グラフ(Chart)を描いて視覚化しよう
df_gdp4 %>% ggplot(aes(year, gdp, col=country)) + geom_line()
## Warning: Removed 10 rows containing missing values (`geom_line()`).
df_gdp4 %>% drop_na(gdp) %>%
ggplot(aes(year, gdp, col=country)) + geom_line() +
labs(title = paste("WDI - NY.GDP.MKTP.CD: ", "gdp"))
Line Plot with one indicator with abbreviation and one country
chosen_indicator <- "SL.UEM.TOTL.NE.ZS"
short_name <- "unemployment"
chosen_country <- "United States"
WDI(country = "all", indicator = c(short_name = chosen_indicator), extra=TRUE, cache=wdi_cache) %>%
filter(country == chosen_country) %>%
ggplot(aes(year, short_name)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator, ": ", short_name, " - ", chosen_country),
y = short_name)
Line Plot with one indicator and one country
chosen_indicator <- "SL.UEM.TOTL.NE.ZS"
chosen_country <- "United States"
WDI(country = "all", indicator = c(chosen_indicator = chosen_indicator),
extra=TRUE, cache=wdi_cache) %>%
filter(country == chosen_country) %>%
ggplot(aes(year, chosen_indicator)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator, " - ", chosen_country),
y = chosen_indicator)
Line Plot with one indicator with abbreviation and several countries
chosen_indicator <- "SL.UEM.TOTL.NE.ZS"
short_name <- "unemployment"
chosen_countries <- c("United States","United Kingdom", "Japan")
WDI(country = "all", indicator = c(short_name = chosen_indicator), extra=TRUE, cache=wdi_cache) %>% drop_na(short_name) %>%
filter(country %in% chosen_countries) %>%
ggplot(aes(year, short_name, col = country)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator, ": ", short_name), y = short_name)
Line Plot with two indicators with abbreviation and one country
chosen_indicator_1 <- "NY.GDP.DEFL.KD.ZG"
short_name_1 <- "gdp_deflator"
chosen_indicator_2 <- "CPTOTSAXNZGY"
short_name_2 <- "cpi_price"
chosen_country <- "United States"
WDI(country = "all", indicator = c(short_name_1 = chosen_indicator_1, short_name_2 = chosen_indicator_2), extra=TRUE, cache=wdi_cache) %>%
filter(country == chosen_country) %>%
pivot_longer(c(short_name_1, short_name_2), names_to = "class", values_to = "value") %>% drop_na(value) %>%
ggplot(aes(year, value, col = class)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator_1, ": ", short_name_1, "\n", chosen_indicator_2, ": ", short_name_2, " - ", chosen_country)) +
scale_color_manual(labels = c(short_name_1, short_name_2), values = scales::hue_pal()(2))
chosen_indicator_1 <- "SL.TLF.CACT.MA.NE.ZS"
short_name_1 <- "male"
chosen_indicator_2 <- "SL.TLF.CACT.FE.NE.ZS"
short_name_2 <- "female"
chosen_country <- "United States"
WDI(country = "all", indicator = c(short_name_1 = chosen_indicator_1, short_name_2 = chosen_indicator_2), extra=TRUE, cache=wdi_cache) %>%
filter(country == chosen_country) %>%
pivot_longer(c(short_name_1, short_name_2), names_to = "class", values_to = "value") %>% drop_na(value) %>%
ggplot(aes(year, value, col = class)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator_1, ": ", short_name_1, "\n", chosen_indicator_2, ": ", short_name_2, " - ", chosen_country)) +
scale_color_manual(labels = c(short_name_1, short_name_2), values = scales::hue_pal()(2))
Line Plot with two indicators with abbreviation and several countries
chosen_indicator_1 <- "NY.GDP.DEFL.KD.ZG"
short_name_1 <- "gdp_deflator"
chosen_indicator_2 <- "CPTOTSAXNZGY"
short_name_2 <- "cpi_price"
chosen_countries <- c("United States", "France", "Japan")
WDI(country = "all", indicator = c(short_name_1 = chosen_indicator_1, short_name_2 = chosen_indicator_2), extra=TRUE, cache=wdi_cache) %>%
filter(country %in% chosen_countries) %>%
pivot_longer(c(short_name_1, short_name_2), names_to = "class", values_to = "value") %>% drop_na(value) %>%
ggplot(aes(year, value, linetype = class, col = country)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator_1, ": ", short_name_1, "\n", chosen_indicator_2, ": ", short_name_2)) +
scale_linetype_manual(labels = c(short_name_1, short_name_2), values = c("solid", "dashed"))
chosen_indicator_1 <- "SL.TLF.CACT.MA.NE.ZS"
short_name_1 <- "male"
chosen_indicator_2 <- "SL.TLF.CACT.FE.NE.ZS"
short_name_2 <- "female"
chosen_countries <- c("United States", "France", "Japan")
WDI(country = "all", indicator = c(short_name_1 = chosen_indicator_1, short_name_2 = chosen_indicator_2), extra=TRUE, cache=wdi_cache) %>%
filter(country %in% chosen_countries) %>%
pivot_longer(c(short_name_1, short_name_2), names_to = "class", values_to = "value") %>% drop_na(value) %>%
ggplot(aes(year, value, linetype = class, col = country)) + geom_line() +
labs(title = paste("WDI ", chosen_indicator_1, ": ", short_name_1, "\n", chosen_indicator_2, ": ", short_name_2)) +
scale_linetype_manual(labels = c(short_name_1, short_name_2), values = c("solid", "dashed"))
上のテンプレートをコピーして、下に貼り付け、指標 indicator と、略称 short_name と、いくつかの国名 chosen_countries を、入れ替えて、試してみてください。